PBTA ActMiR Results Analysis¶

Author: Shehbeel Arif¶

Preclinical Laboratory Research Unit¶

The Center for Data Driven Discovery in Biomedicine (D3b)¶

Children's Hospital of Philadelphia¶

In [5]:
# Library for handling data
import pandas as pd

# Library for UMAP projections
from umap.umap_ import UMAP

# Library for visualization
import plotly.express as px
In [2]:
df = pd.read_csv('hope_miRactivity_results.csv')
df
Out[2]:
Kids_First_Biospecimen_ID sample_id aliquot_id RNA_library short_histology molecular_subtype age_at_diagnosis_days reported_gender age_group MIMAT0010195 ... MIMAT0004986 MIMAT0004987 MIMAT0000094 MIMAT0026473 MIMAT0004510 MIMAT0000095 MIMAT0022842 MIMAT0000097 MIMAT0004678 MIMAT0000689
0 BS_VF5XWFXD 7316-2594 574553 stranded HGAT DMG, H3 K28, TP53 loss 8 Male 5-10 0.046957 ... -0.055680 0.019986 0.303817 -0.039460 -0.012004 0.149087 0.032207 0.293745 -0.008587 0.184319
1 BS_W60RQAKK 7316-371 734516 stranded HGAT HGG, To be classified 5 Female 0-5 0.184673 ... 0.117588 0.138313 0.404067 0.093777 0.093444 0.253196 0.126721 0.403668 0.119945 0.299600
2 BS_TK84K675 7316-2751 711698 stranded HGAT HGG, H3 wildtype 3 Female 0-5 0.105755 ... 0.028216 0.057141 0.239396 0.027004 0.072512 0.151019 0.115442 0.283237 0.088276 0.205366
3 BS_HWGWYCY7 7316-1455 400856 poly-A HGAT HGG, To be classified 2 Female 0-5 -0.104705 ... 0.054945 -0.063921 -0.013288 -0.005415 0.080462 0.034499 -0.035228 0.001027 0.142655 0.086316
4 BS_RX1YTZ7F 7316-388 398449 poly-A HGAT DMG, H3 K28, TP53 activated 5 Female 0-5 0.113665 ... 0.032385 0.063875 0.341264 0.028191 0.050142 0.189022 0.087262 0.338444 0.034852 0.213586
5 BS_XZM79E42 7316-409 398448 poly-A HGAT DMG, H3 K28, TP53 activated 12 Female 10-15 0.097259 ... -0.004175 0.044509 0.271858 0.006494 0.033367 0.142336 0.086633 0.236862 0.067023 0.134444
6 BS_XQ5SFW35 7316-89 577715 stranded HGAT HGG, H3 wildtype 4 Male 0-5 0.134111 ... 0.040414 0.103520 0.369665 0.038248 0.061328 0.207337 0.117338 0.378433 0.049278 0.259048
7 BS_MX23ZY0Y 7316-195 739938 stranded HGAT DMG, H3 K28, TP53 loss 9 Male 5-10 -0.039391 ... -0.081045 -0.076824 0.215427 -0.108573 -0.122767 0.074374 -0.091994 0.190338 -0.066512 0.079004
8 BS_4PPHAQXF 7316-870 398453 poly-A HGAT HGG, H3 wildtype 10 Female 5-10 0.080769 ... -0.010550 -0.003904 0.278134 -0.054702 -0.024307 0.113260 0.039207 0.280828 0.012331 0.172361
9 BS_0RQ4P069 7316-1746 739942 stranded HGAT HGG, H3 wildtype, TP53 loss 6 Female 5-10 -0.151490 ... -0.085015 -0.252793 -0.226109 -0.139109 -0.086405 -0.268872 -0.104461 -0.237889 0.019412 -0.246581
10 BS_H0QWRJE2 7316-2146 564445 stranded HGAT HGG, H3 wildtype 3 Female 0-5 0.028185 ... -0.075678 -0.071681 0.179788 -0.075186 -0.003470 0.073911 0.026267 0.198661 0.041545 0.144978
11 BS_49BQS7Z6 7316-2151 711713 stranded HGAT HGG, H3 wildtype 6 Male 5-10 0.098388 ... -0.004243 0.026573 0.298890 -0.008147 0.036768 0.111029 0.089668 0.309467 0.047102 0.227748
12 BS_A0DYVX9J 7316-2140 470436 stranded HGAT HGG, H3 wildtype, TP53 loss 20 Male >15 0.077432 ... -0.000316 0.011083 0.312797 -0.039469 -0.013298 0.188620 0.030691 0.288824 0.041812 0.195492
13 BS_0VXZCRJS 7316-466 401561 poly-A HGAT DMG, H3 K28 7 Male 5-10 0.023542 ... -0.109889 -0.044252 0.293370 -0.123075 -0.109265 0.100999 -0.034502 0.277360 -0.080691 0.115090
14 BS_1A6MQ9ZA 7316-1769 470030 stranded HGAT DMG, H3 K28 15 Male 10-15 0.069749 ... -0.037707 0.033932 0.333838 -0.039091 0.003598 0.195120 0.032581 0.319072 0.022263 0.208367
15 BS_AWH9757B 7316-3769 728271 stranded HGAT HGG, H3 wildtype 9 Female 5-10 0.216734 ... 0.121410 0.162101 0.442702 0.098945 0.132501 0.294107 0.190197 0.447034 0.111729 0.334226
16 BS_V3Z3DB4N 7316-3765 728273 stranded HGAT HGG, H3 wildtype, TP53 loss 7 Female 5-10 0.162427 ... 0.060295 0.104771 0.402539 0.041316 0.071960 0.239335 0.129007 0.407877 0.045677 0.300105
17 BS_5GNQC2FF 7316-2176 739936 stranded HGAT HGG, H3 wildtype 17 Female >15 -0.168451 ... -0.103250 -0.279049 -0.235817 -0.160501 -0.100873 -0.278256 -0.119185 -0.245856 0.005448 -0.248443
18 BS_6WP1FHTE 7316-1763 549574 stranded HGAT DMG, H3 K28, TP53 loss 10 Female 5-10 0.121233 ... 0.027616 0.098796 0.376402 0.039615 0.076167 0.225092 0.133182 0.373657 0.036720 0.262401
19 BS_ZD5HN296 7316-445 401741 poly-A HGAT HGG, H3 wildtype 6 Male 5-10 0.154198 ... 0.032801 0.080855 0.332752 0.043216 0.080150 0.185049 0.143249 0.357416 0.083080 0.262384
20 BS_JS95PE0J 7316-1774 574549 stranded HGAT HGG, H3 wildtype, TP53 loss 7 Male 5-10 0.156722 ... 0.081838 0.100529 0.339750 0.073451 0.112922 0.221652 0.148865 0.334720 0.125234 0.256886
21 BS_TJTEF70D 7316-895 470020 stranded HGAT HGG, H3 wildtype 3 Female 0-5 0.159911 ... 0.037145 0.068441 0.327287 0.038058 0.102268 0.223956 0.141065 0.341046 0.129624 0.274696
22 BS_M8EA6R2A 7316-913 739931 stranded HGAT HGG, H3 wildtype 3 Female 0-5 -0.111993 ... -0.106253 -0.157951 0.123723 -0.148130 -0.149451 0.011854 -0.215547 0.103965 -0.091319 0.007893
23 BS_23QW0BBA 7316-114 577714 stranded HGAT HGG, H3 wildtype 10 Female 5-10 -0.016528 ... -0.046973 -0.004073 0.280399 -0.060845 -0.058444 0.134143 -0.037533 0.273988 -0.082397 0.112136
24 BS_WKESC3XN 7316-3303 734524 stranded HGAT HGG, H3 wildtype, TP53 loss 18 Male >15 0.123707 ... 0.046442 0.081986 0.332561 0.022124 0.039491 0.157843 0.094590 0.337584 0.064393 0.235651
25 BS_4B0BAVTX 7316-2152 574550 stranded HGAT DMG, H3 K28, TP53 loss 12 Male 10-15 0.083742 ... 0.000488 0.033752 0.272690 -0.028016 0.009667 0.137883 0.070368 0.275965 0.049212 0.189988

26 rows × 2059 columns

In [3]:
#print(df.molecular_subtype.unique().tolist())
print(df.molecular_subtype.value_counts())
HGG, H3 wildtype               11
HGG, H3 wildtype, TP53 loss     5
DMG, H3 K28, TP53 loss          4
HGG, To be classified           2
DMG, H3 K28, TP53 activated     2
DMG, H3 K28                     2
Name: molecular_subtype, dtype: int64

UMAP Plot of HOPE Cohort Data¶

In [4]:
umap_2d = UMAP(n_components=2, init='random', random_state=0)

proj_2d = umap_2d.fit_transform(df.iloc[:,9:])

fig1 = px.scatter(proj_2d, x=0, y=1, color=df['molecular_subtype'].tolist(), title='Hope Clustering based on Molecular Subtype (ActMiR)')
fig2 = px.scatter(proj_2d, x=0, y=1, color=df['reported_gender'].tolist(), title='Hope Clustering based on Patient Gender (ActMiR)')
fig3 = px.scatter(proj_2d, x=0, y=1, color=df['age_group'].tolist(), title='Hope Clustering based on Patient Age (ActMiR)')

fig1.update_xaxes(title_text='UMAP1')
fig1.update_yaxes(title_text='UMAP2')
fig2.update_xaxes(title_text='UMAP1')
fig2.update_yaxes(title_text='UMAP2')
fig3.update_xaxes(title_text='UMAP1')
fig3.update_yaxes(title_text='UMAP2')

fig1.show()
fig2.show()
fig3.show()